{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv\n",
    "import random\n",
    "import json\n",
    "import jsonlines\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_data(file_path):\n",
    "    data_file = open(file_path)\n",
    "    data = []\n",
    "    for line in data_file:\n",
    "        data.append(json.loads(line))\n",
    "    data = pd.DataFrame(data)\n",
    "    data_file.close()\n",
    "    return data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['business_id', 'name', 'address', 'city', 'state', 'postal_code',\n",
      "       'latitude', 'longitude', 'stars', 'review_count', 'is_open',\n",
      "       'attributes', 'categories', 'hours'],\n",
      "      dtype='object')\n"
     ]
    }
   ],
   "source": [
    "file_path = \"/<YOUR_OWN_PATH>/ToolQA/data/raw_data/yelp/yelp_academic_dataset_business.json\"\n",
    "data = read_data(file_path)\n",
    "print(data.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_questions_per_template = 9\n",
    "question_id = 0\n",
    "questions = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-yelp-0000', 'question': 'What is the address of Snip Philadelphia in area with postal code 19130?', 'answer': '2052 Fairmount Ave'}, {'qid': 'easy-yelp-0001', 'question': 'What is the address of Smilies in area with postal code T5V 1H9?', 'answer': '15003 118 Ave'}, {'qid': 'easy-yelp-0002', 'question': 'What is the address of Cutting Edge Upholstery Solutions in area with postal code 33611?', 'answer': '4338 S Manhattan Ave'}, {'qid': 'easy-yelp-0003', 'question': 'What is the address of FedEx Office Print & Ship Center in area with postal code 33612?', 'answer': '2798 E Fowler Ave'}, {'qid': 'easy-yelp-0004', 'question': 'What is the address of Walmart Supercenter in area with postal code 37211?', 'answer': '5824 Nolensville Pike, 4040 Nolensville Pike'}, {'qid': 'easy-yelp-0005', 'question': 'What is the address of Level Seven Salon in area with postal code 19460?', 'answer': '427 Schuylkill Rd'}, {'qid': 'easy-yelp-0006', 'question': 'What is the address of Legacy Park Apartments in area with postal code 46112?', 'answer': '6750 Legacy Park Dr'}, {'qid': 'easy-yelp-0007', 'question': \"What is the address of Domino's Pizza in area with postal code 08103?\", 'answer': '446 S Broadway'}, {'qid': 'easy-yelp-0008', 'question': 'What is the address of Best Auto Detailz in area with postal code T6B 3C5?', 'answer': '6916 68 Avenue NW'}, {'qid': 'easy-yelp-0009', 'question': 'What is the address of 7-Eleven in area with postal code 37203?', 'answer': '3400 West End Ave'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template+1):\n",
    "    random_index = random.randint(0, len(data)-1)\n",
    "    row = data.iloc[random_index]\n",
    "    name = row[\"name\"]\n",
    "    postal_code = row[\"postal_code\"]\n",
    "    question = \"What is the address of {} in area with postal code {}?\".format(name, postal_code)\n",
    "    sub_table = data.loc[(data[\"name\"] == name) & (data[\"postal_code\"] == postal_code)]\n",
    "    address = str(\", \".join(list(sub_table[\"address\"].unique())))\n",
    "    answer = address\n",
    "    questions.append({\"qid\": \"easy-yelp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-yelp-0009', 'question': 'What is the address of 7-Eleven in area with postal code 37203?', 'answer': '3400 West End Ave'}, {'qid': 'easy-yelp-0010', 'question': 'Which city is Ali Baba Cafe & Hookah Bar located in PA?', 'answer': 'Philadelphia'}, {'qid': 'easy-yelp-0011', 'question': 'Which city is The Pink Daisy located in PA?', 'answer': 'Yardley'}, {'qid': 'easy-yelp-0012', 'question': 'Which city is Mi Hacienda Mexican Restaurant located in IN?', 'answer': 'Carmel'}, {'qid': 'easy-yelp-0013', 'question': 'Which city is Guild Mortgage Company located in ID?', 'answer': 'Eagle'}, {'qid': 'easy-yelp-0014', 'question': 'Which city is Riverview Restaurant & Marina located in TN?', 'answer': 'Ashland City'}, {'qid': 'easy-yelp-0015', 'question': 'Which city is Famous Tate Appliance & Bedding Center located in FL?', 'answer': 'Lutz, Wesley Chapel, Largo, Tampa, Brandon, Spring Hill, Port Richey, Oldsmar'}, {'qid': 'easy-yelp-0016', 'question': \"Which city is Knapp's Cyclery located in NJ?\", 'answer': 'Trenton'}, {'qid': 'easy-yelp-0017', 'question': 'Which city is Francis The Duke Barber Co. located in PA?', 'answer': 'Philadelphia'}, {'qid': 'easy-yelp-0018', 'question': 'Which city is Whitewater Park Apartments located in ID?', 'answer': 'Boise'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    random_index = random.randint(0, len(data)-1)\n",
    "    row = data.iloc[random_index]\n",
    "    name = row[\"name\"]\n",
    "    state = row[\"state\"]\n",
    "    question = \"Which city is {} located in {}?\".format(name, state)\n",
    "    sub_table = data.loc[(data[\"name\"] == name) & (data[\"state\"] == state)]\n",
    "    city = str(\", \".join(list(sub_table[\"city\"].unique())))\n",
    "    answer = city\n",
    "    questions.append({\"qid\": \"easy-yelp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-yelp-0018', 'question': 'Which city is Whitewater Park Apartments located in ID?', 'answer': 'Boise'}, {'qid': 'easy-yelp-0019', 'question': 'What state is Gelato Aromi located in?', 'answer': 'PA'}, {'qid': 'easy-yelp-0020', 'question': 'What state is America Bar & Grill located in?', 'answer': 'PA'}, {'qid': 'easy-yelp-0021', 'question': 'What state is Plumber of Tucson located in?', 'answer': 'AZ'}, {'qid': 'easy-yelp-0022', 'question': 'What state is Luna Hair located in?', 'answer': 'PA'}, {'qid': 'easy-yelp-0023', 'question': 'What state is Sun Ray Grill located in?', 'answer': 'LA'}, {'qid': 'easy-yelp-0024', 'question': 'What state is China Bowl Restaurant located in?', 'answer': 'PA'}, {'qid': 'easy-yelp-0025', 'question': 'What state is Fence USA located in?', 'answer': 'NJ'}, {'qid': 'easy-yelp-0026', 'question': \"What state is Nonno's Bakery located in?\", 'answer': 'PA'}, {'qid': 'easy-yelp-0027', 'question': 'What state is Schweiger Dermatology Group located in?', 'answer': 'PA'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    criteria = 0\n",
    "    while criteria != 1:\n",
    "        random_index = random.randint(0, len(data)-1)\n",
    "        row = data.iloc[random_index]\n",
    "        name = row[\"name\"]\n",
    "        question = \"What state is {} located in?\".format(name)\n",
    "        sub_table = data.loc[(data[\"name\"] == name)]\n",
    "        state = str(\", \".join(list(sub_table[\"state\"].unique())))\n",
    "        answer = state\n",
    "        criteria = len(list(sub_table[\"state\"].unique()))\n",
    "    questions.append({\"qid\": \"easy-yelp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-yelp-0027', 'question': 'What state is Schweiger Dermatology Group located in?', 'answer': 'PA'}, {'qid': 'easy-yelp-0028', 'question': 'What is the postal code of Ceviche Tapas Bar & Restaurant in Clearwater, FL?', 'answer': '33759'}, {'qid': 'easy-yelp-0029', 'question': 'What is the postal code of Josette Tkacik in Santa Barbara, CA?', 'answer': '93101'}, {'qid': 'easy-yelp-0030', 'question': 'What is the postal code of Vogue Nails in Somerdale, NJ?', 'answer': '08083'}, {'qid': 'easy-yelp-0031', 'question': \"What is the postal code of Arby's in Troy, IL?\", 'answer': '62294'}, {'qid': 'easy-yelp-0032', 'question': \"What is the postal code of Snoozy's Cafe in Schwenksville, PA?\", 'answer': '19473'}, {'qid': 'easy-yelp-0033', 'question': 'What is the postal code of ITT Technical Institute in Indianapolis, IN?', 'answer': '46268'}, {'qid': 'easy-yelp-0034', 'question': 'What is the postal code of Cynergy Tax in Christiana, DE?', 'answer': '19702'}, {'qid': 'easy-yelp-0035', 'question': 'What is the postal code of J & S Mechanical in Hendersonville, TN?', 'answer': '37075'}, {'qid': 'easy-yelp-0036', 'question': 'What is the postal code of China in Bombay in Ballwin, MO?', 'answer': '63011'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    criteria = 0\n",
    "    while criteria != 1:\n",
    "        random_index = random.randint(0, len(data)-1)\n",
    "        row = data.iloc[random_index]\n",
    "        name = row[\"name\"]\n",
    "        city = row[\"city\"]\n",
    "        state = row[\"state\"]\n",
    "        question = \"What is the postal code of {} in {}, {}?\".format(name, city, state)\n",
    "        sub_table = data.loc[(data[\"name\"] == name) & (data[\"city\"] == city) & (data[\"state\"] == state)]\n",
    "        postal_code = str(\", \".join(list(sub_table[\"postal_code\"].unique())))\n",
    "        answer = postal_code\n",
    "        criteria = len(list(sub_table[\"postal_code\"].unique()))\n",
    "    questions.append({\"qid\": \"easy-yelp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-yelp-0036', 'question': 'What is the postal code of China in Bombay in Ballwin, MO?', 'answer': '63011'}, {'qid': 'easy-yelp-0037', 'question': 'What is the star rating of Ent Specialists in area with postal code 89502, Reno, NV?', 'answer': '3.5'}, {'qid': 'easy-yelp-0038', 'question': 'What is the star rating of Homeward Bound Veterinary Services in area with postal code 89523, Reno, NV?', 'answer': '5.0'}, {'qid': 'easy-yelp-0039', 'question': 'What is the star rating of Perenn Bakery in area with postal code 89511, Reno, NV?', 'answer': '4.5'}, {'qid': 'easy-yelp-0040', 'question': 'What is the star rating of Hermann London Real Estate Group in area with postal code 63143, St. Louis, MO?', 'answer': '3.0'}, {'qid': 'easy-yelp-0041', 'question': 'What is the star rating of Rural King in area with postal code 62226, Swansea, IL?', 'answer': '2.5'}, {'qid': 'easy-yelp-0042', 'question': 'What is the star rating of Red Mango in area with postal code 19153, Philadelphia, PA?', 'answer': '3.0'}, {'qid': 'easy-yelp-0043', 'question': 'What is the star rating of The Scented Leaf in area with postal code 85701, Tucson, AZ?', 'answer': '4.5'}, {'qid': 'easy-yelp-0044', 'question': 'What is the star rating of The Home Depot in area with postal code 08081, Sicklerville, NJ?', 'answer': '2.0'}, {'qid': 'easy-yelp-0045', 'question': 'What is the star rating of Golden Donut in area with postal code 19147, Philadelphia, PA?', 'answer': '3.5'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    random_index = random.randint(0, len(data)-1)\n",
    "    row = data.iloc[random_index]\n",
    "    name = row[\"name\"]\n",
    "    city = row[\"city\"]\n",
    "    state = row[\"state\"]\n",
    "    postal_code = row[\"postal_code\"]\n",
    "    question = \"What is the star rating of {} in area with postal code {}, {}, {}?\".format(name, postal_code, city, state)\n",
    "    sub_table = data.loc[(data[\"name\"] == name) & (data[\"city\"] == city) & (data[\"state\"] == state) & (data[\"postal_code\"] == postal_code)]\n",
    "    stars = str(\", \".join([str(i) for i in list(sub_table[\"stars\"].unique())]))\n",
    "    answer = stars\n",
    "    questions.append({\"qid\": \"easy-yelp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-yelp-0045', 'question': 'What is the star rating of Golden Donut in area with postal code 19147, Philadelphia, PA?', 'answer': '3.5'}, {'qid': 'easy-yelp-0046', 'question': 'How many reviews does South Shore Dental Excellence receive in the area with postal code 33534, Gibsonton, FL, received?', 'answer': '10'}, {'qid': 'easy-yelp-0047', 'question': 'How many reviews does Le Onde Ristorante receive in the area with postal code 19027, Elkins Park, PA, received?', 'answer': '19'}, {'qid': 'easy-yelp-0048', 'question': 'How many reviews does Las Cebollas Mexican Grill receive in the area with postal code 37115, Madison, TN, received?', 'answer': '7'}, {'qid': 'easy-yelp-0049', 'question': 'How many reviews does Fifth Street Deli & Market receive in the area with postal code 85711, Tucson, AZ, received?', 'answer': '86'}, {'qid': 'easy-yelp-0050', 'question': 'How many reviews does Number 1 Beauty Supply receive in the area with postal code 19143, Philadelphia, PA, received?', 'answer': '7'}, {'qid': 'easy-yelp-0051', 'question': 'How many reviews does Goodyear Auto Service receive in the area with postal code 34683, Palm Harbor, FL, received?', 'answer': '10'}, {'qid': 'easy-yelp-0052', 'question': 'How many reviews does Iron Hill Brewery TapHouse receive in the area with postal code 19341, Exton, PA, received?', 'answer': '41'}, {'qid': 'easy-yelp-0053', 'question': 'How many reviews does Cherokee Store receive in the area with postal code 33611, Tampa, FL, received?', 'answer': '9'}, {'qid': 'easy-yelp-0054', 'question': 'How many reviews does Cool Springs Family Medicine receive in the area with postal code 37067, Franklin, TN, received?', 'answer': '29'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    random_index = random.randint(0, len(data)-1)\n",
    "    row = data.iloc[random_index]\n",
    "    name = row[\"name\"]\n",
    "    city = row[\"city\"]\n",
    "    state = row[\"state\"]\n",
    "    postal_code = row[\"postal_code\"]\n",
    "    question = \"How many reviews does {} receive in the area with postal code {}, {}, {}, received?\".format(name, postal_code, city, state)\n",
    "    sub_table = data.loc[(data[\"name\"] == name) & (data[\"city\"] == city) & (data[\"state\"] == state) & (data[\"postal_code\"] == postal_code)]\n",
    "    review_count = str(\", \".join([str(i) for i in list(sub_table[\"review_count\"].unique())]))\n",
    "    answer = review_count\n",
    "    questions.append({\"qid\": \"easy-yelp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-yelp-0054', 'question': 'How many reviews does Cool Springs Family Medicine receive in the area with postal code 37067, Franklin, TN, received?', 'answer': '29'}, {'qid': 'easy-yelp-0055', 'question': 'Is Melodies Cafe still open in area with postal code 19003, Ardmore, PA?', 'answer': 'No'}, {'qid': 'easy-yelp-0056', 'question': 'Is Potter Family Eye Care still open in area with postal code 46055, McCordsville, IN?', 'answer': 'Yes'}, {'qid': 'easy-yelp-0057', 'question': \"Is Denny's still open in area with postal code T6L 7A7, Edmonton, AB?\", 'answer': 'Yes'}, {'qid': 'easy-yelp-0058', 'question': 'Is Einstein Bros. Bagels still open in area with postal code 33609, Tampa, FL?', 'answer': 'Yes'}, {'qid': 'easy-yelp-0059', 'question': 'Is Westampton Family Diner still open in area with postal code 08060, Mount Holly, NJ?', 'answer': 'Yes'}, {'qid': 'easy-yelp-0060', 'question': 'Is The Farmacy NJ still open in area with postal code 08065, Palmyra, NJ?', 'answer': 'No'}, {'qid': 'easy-yelp-0061', 'question': 'Is Saigon Noodle Bar still open in area with postal code 19118, Philadelphia, PA?', 'answer': 'Yes'}, {'qid': 'easy-yelp-0062', 'question': 'Is A & D Automotive Center still open in area with postal code 33602, Tampa, FL?', 'answer': 'Yes'}, {'qid': 'easy-yelp-0063', 'question': 'Is William Whitfield Park still open in area with postal code 37211, Nashville, TN?', 'answer': 'Yes'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    random_index = random.randint(0, len(data)-1)\n",
    "    row = data.iloc[random_index]\n",
    "    name = row[\"name\"]\n",
    "    city = row[\"city\"]\n",
    "    state = row[\"state\"]\n",
    "    postal_code = row[\"postal_code\"]\n",
    "    question = \"Is {} still open in area with postal code {}, {}, {}?\".format(name, postal_code, city, state)\n",
    "    sub_table = data.loc[(data[\"name\"] == name) & (data[\"city\"] == city) & (data[\"state\"] == state) & (data[\"postal_code\"] == postal_code)]\n",
    "    is_open = sub_table.iloc[0][\"is_open\"]\n",
    "    if is_open == 0:\n",
    "        answer = \"No\"\n",
    "    else:\n",
    "        answer = \"Yes\"\n",
    "    questions.append({\"qid\": \"easy-yelp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-yelp-0063', 'question': 'Is William Whitfield Park still open in area with postal code 37211, Nashville, TN?', 'answer': 'Yes'}, {'qid': 'easy-yelp-0064', 'question': 'Does Mayuri Indian Restaurant require appointment in area with postal code 33617, Tampa, FL?', 'answer': 'No'}, {'qid': 'easy-yelp-0065', 'question': 'Does Tequila Sunrise require appointment in area with postal code 46038, Fishers, IN?', 'answer': 'No'}, {'qid': 'easy-yelp-0066', 'question': 'Does Nordvind Resort require appointment in area with postal code 33706, Treasure Is, FL?', 'answer': 'No'}, {'qid': 'easy-yelp-0067', 'question': 'Does Santa Barbara Brewery Tours require appointment in area with postal code 93101, Santa Barbara, CA?', 'answer': 'No'}, {'qid': 'easy-yelp-0068', 'question': \"Does Mi Jalapeno require appointment in area with postal code 62269, O'Fallon, IL?\", 'answer': 'No'}, {'qid': 'easy-yelp-0069', 'question': \"Does Franklin's Classic Steaks & Hoagies require appointment in area with postal code 19468, Limerick, PA?\", 'answer': 'No'}, {'qid': 'easy-yelp-0070', 'question': 'Does Dentists On Washington require appointment in area with postal code 19147, Philadelphia, PA?', 'answer': 'Yes'}, {'qid': 'easy-yelp-0071', 'question': \"Does Sonny's BBQ require appointment in area with postal code 33543, Wesley Chapel, FL?\", 'answer': 'No'}, {'qid': 'easy-yelp-0072', 'question': 'Does Philippine Smoked BBQ Grill require appointment in area with postal code 19805, Wilmington, DE?', 'answer': 'No'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    random_index = random.randint(0, len(data)-1)\n",
    "    row = data.iloc[random_index]\n",
    "    name = row[\"name\"]\n",
    "    city = row[\"city\"]\n",
    "    state = row[\"state\"]\n",
    "    postal_code = row[\"postal_code\"]\n",
    "    question = \"Does {} require appointment in area with postal code {}, {}, {}?\".format(name, postal_code, city, state)\n",
    "    sub_table = data.loc[(data[\"name\"] == name) & (data[\"city\"] == city) & (data[\"state\"] == state) & (data[\"postal_code\"] == postal_code)]\n",
    "    attributes = sub_table.iloc[0][\"attributes\"]\n",
    "    if attributes != None:\n",
    "        if \"ByAppointmentOnly\" in attributes:\n",
    "            if attributes[\"ByAppointmentOnly\"] == \"True\":\n",
    "                answer = \"Yes\"\n",
    "            else:\n",
    "                answer = \"No\"\n",
    "        else:\n",
    "            answer = \"No\"\n",
    "    else:\n",
    "        answer = \"No\"\n",
    "    questions.append({\"qid\": \"easy-yelp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-yelp-0072', 'question': 'Does Philippine Smoked BBQ Grill require appointment in area with postal code 19805, Wilmington, DE?', 'answer': 'No'}, {'qid': 'easy-yelp-0073', 'question': 'What are the hours of operation for Panera Bread in area with postal code 33566, Plant City, FL?', 'answer': 'Monday: 7:0-20:30, Tuesday: 7:0-20:30, Wednesday: 7:0-20:30, Thursday: 7:0-20:30, Friday: 7:0-20:30, Saturday: 7:0-20:30, Sunday: 7:0-20:30'}, {'qid': 'easy-yelp-0074', 'question': 'What are the hours of operation for Smoothie King in area with postal code 33781, Pinellas Park, FL?', 'answer': 'Monday: 0:0-0:0, Tuesday: 7:0-21:0, Wednesday: 7:0-19:0, Thursday: 10:0-16:0, Friday: 7:0-19:0, Saturday: 9:0-20:0, Sunday: 11:0-18:0'}, {'qid': 'easy-yelp-0075', 'question': 'What are the hours of operation for K9 Athletic Club in area with postal code 63110, Saint Louis, MO?', 'answer': 'Monday: 6:30-18:30, Tuesday: 6:30-18:30, Wednesday: 6:30-18:30, Thursday: 6:30-18:30, Friday: 6:30-18:30, Saturday: 9:0-17:0'}, {'qid': 'easy-yelp-0076', 'question': 'What are the hours of operation for Ms Goody Cupcake in area with postal code 19148, Philadelphia, PA?', 'answer': 'Tuesday: 11:0-19:0, Wednesday: 11:0-19:0, Thursday: 11:0-19:0, Friday: 11:0-21:0, Saturday: 11:0-21:0'}, {'qid': 'easy-yelp-0077', 'question': 'What are the hours of operation for Briggs Auction in area with postal code 19060, Garnet Valley, PA?', 'answer': 'Thursday: 16:0-19:0, Friday: 9:0-23:0'}, {'qid': 'easy-yelp-0078', 'question': 'What are the hours of operation for Ocean Breeze Cleaning and Handyman Service in area with postal code 89434, Sparks, NV?', 'answer': 'Monday: 0:0-0:0, Tuesday: 7:0-17:0, Wednesday: 7:0-17:0, Thursday: 7:0-12:0, Friday: 7:0-12:0, Saturday: 7:0-12:0'}, {'qid': 'easy-yelp-0079', 'question': 'What are the hours of operation for Pennsylvania Soup & Seafood House in area with postal code 18901, Doylestown, PA?', 'answer': 'Monday: 11:0-15:0, Tuesday: 11:0-15:0, Wednesday: 11:0-20:0, Thursday: 11:0-19:0, Friday: 11:0-20:0, Saturday: 11:0-20:0'}, {'qid': 'easy-yelp-0080', 'question': \"What are the hours of operation for Jimmy John's in area with postal code 62269, O'Fallon, IL?\", 'answer': 'Monday: 11:0-22:0, Tuesday: 11:0-22:0, Wednesday: 11:0-22:0, Thursday: 11:0-22:0, Friday: 11:0-22:0, Saturday: 11:0-22:0, Sunday: 11:0-22:0'}, {'qid': 'easy-yelp-0081', 'question': \"What are the hours of operation for Chef JJ's Back Yard in area with postal code 46220, Indianapolis, IN?\", 'answer': 'Monday: 0:0-0:0, Tuesday: 12:0-18:0, Wednesday: 12:0-18:0, Thursday: 12:0-18:0, Friday: 12:0-18:0, Saturday: 12:0-17:0'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    answer = None\n",
    "    while answer == None:\n",
    "        random_index = random.randint(0, len(data)-1)\n",
    "        row = data.iloc[random_index]\n",
    "        name = row[\"name\"]\n",
    "        city = row[\"city\"]\n",
    "        state = row[\"state\"]\n",
    "        postal_code = row[\"postal_code\"]\n",
    "        question = \"What are the hours of operation for {} in area with postal code {}, {}, {}?\".format(name, postal_code, city, state)\n",
    "        sub_table = data.loc[(data[\"name\"] == name) & (data[\"city\"] == city) & (data[\"state\"] == state) & (data[\"postal_code\"] == postal_code)]\n",
    "        hours = sub_table.iloc[0][\"hours\"]\n",
    "        if hours == None:\n",
    "            answer = None\n",
    "        else:\n",
    "            days = list(hours.keys())\n",
    "            times = list(hours.values())\n",
    "            answer = []\n",
    "            for j in range(len(hours)):\n",
    "                answer.append(\"{}: {}\".format(days[j], times[j]))\n",
    "            answer = \", \".join(answer)\n",
    "    questions.append({\"qid\": \"easy-yelp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-yelp-0081', 'question': \"What are the hours of operation for Chef JJ's Back Yard in area with postal code 46220, Indianapolis, IN?\", 'answer': 'Monday: 0:0-0:0, Tuesday: 12:0-18:0, Wednesday: 12:0-18:0, Thursday: 12:0-18:0, Friday: 12:0-18:0, Saturday: 12:0-17:0'}, {'qid': 'easy-yelp-0082', 'question': 'What categories does The Piper Pub & Grill belong to, in area with postal code 83702, Boise, ID?', 'answer': 'Nightlife, Pubs, Bars, American (Traditional), Restaurants'}, {'qid': 'easy-yelp-0083', 'question': 'What categories does Clemens Chimney Sweep belong to, in area with postal code 19053, Feasterville Trevose, PA?', 'answer': 'Chimney Sweeps, Home Services, Masonry/Concrete, Contractors'}, {'qid': 'easy-yelp-0084', 'question': 'What categories does Two Men and a Truck St. Louis East belong to, in area with postal code 62234, Collinsville, IL?', 'answer': 'Home Services, Movers, Shopping, Packing Services, Packing Supplies'}, {'qid': 'easy-yelp-0085', 'question': 'What categories does Tacos Jalisco belong to, in area with postal code 89502, Reno, NV?', 'answer': 'Mexican, Restaurants'}, {'qid': 'easy-yelp-0086', 'question': 'What categories does Sandbar Patio Bar & Grill belong to, in area with postal code 83714, Boise, ID?', 'answer': 'Food, Restaurants, American (Traditional)'}, {'qid': 'easy-yelp-0087', 'question': \"What categories does Buddy's Burgers - Kennett Square belong to, in area with postal code 19348, Kennett Square, PA?\", 'answer': 'Burgers, Fast Food, Restaurants'}, {'qid': 'easy-yelp-0088', 'question': 'What categories does Highway Tire & Auto Service belong to, in area with postal code 08060, Mount Holly, NJ?', 'answer': 'Tires, Towing, Automotive, Oil Change Stations, Auto Repair'}, {'qid': 'easy-yelp-0089', 'question': \"What categories does Baily's Dairy belong to, in area with postal code 19382, West Chester, PA?\", 'answer': 'Food, Specialty Food'}, {'qid': 'easy-yelp-0090', 'question': 'What categories does Orvelle Nails & Spa belong to, in area with postal code 37027, Brentwood, TN?', 'answer': 'Beauty & Spas, Nail Salons'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    random_index = random.randint(0, len(data)-1)\n",
    "    row = data.iloc[random_index]\n",
    "    name = row[\"name\"]\n",
    "    city = row[\"city\"]\n",
    "    state = row[\"state\"]\n",
    "    postal_code = row[\"postal_code\"]\n",
    "    question = \"What categories does {} belong to, in area with postal code {}, {}, {}?\".format(name, postal_code, city, state)\n",
    "    sub_table = data.loc[(data[\"name\"] == name) & (data[\"city\"] == city) & (data[\"state\"] == state) & (data[\"postal_code\"] == postal_code)]\n",
    "    categories = list(sub_table[\"categories\"].unique())\n",
    "    answer = \", \".join(categories)\n",
    "    questions.append({\"qid\": \"easy-yelp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-yelp-0090', 'question': 'What categories does Orvelle Nails & Spa belong to, in area with postal code 37027, Brentwood, TN?', 'answer': 'Beauty & Spas, Nail Salons'}, {'qid': 'easy-yelp-0091', 'question': 'What are the coordinates of Holiday Inn Express Nashville W I40/Whitebridge Rd in area with postal code 37209, Nashville, TN?', 'answer': '36.1484039504, -86.8590614945'}, {'qid': 'easy-yelp-0092', 'question': \"What are the coordinates of Kary's Beauty Salon in area with postal code 19148, Philadelphia, PA?\", 'answer': '39.9288599, -75.1566897'}, {'qid': 'easy-yelp-0093', 'question': 'What are the coordinates of Ada County in area with postal code 83704, Boise, ID?', 'answer': '43.6089445, -116.2873523'}, {'qid': 'easy-yelp-0094', 'question': 'What are the coordinates of Chal-Brit Beverages in area with postal code 18914, Chalfont, PA?', 'answer': '40.2893931, -75.1994282'}, {'qid': 'easy-yelp-0095', 'question': 'What are the coordinates of Italian Bistro in area with postal code 19152, Philadelphia, PA?', 'answer': '40.0731939, -75.0352627'}, {'qid': 'easy-yelp-0096', 'question': 'What are the coordinates of D & B Tailors in area with postal code 19073, Newtown Square, PA?', 'answer': '39.9875334, -75.4001299'}, {'qid': 'easy-yelp-0097', 'question': 'What are the coordinates of Zachs Famous Donair in area with postal code T6K 3L6, Edmonton, AB?', 'answer': '53.4722697, -113.4504354'}, {'qid': 'easy-yelp-0098', 'question': 'What are the coordinates of Nicholson Cleaners in area with postal code 37203, Nashville, TN?', 'answer': '36.1483514, -86.8079997'}, {'qid': 'easy-yelp-0099', 'question': 'What are the coordinates of NTB-National Tire & Battery in area with postal code 37027, Brentwood, TN?', 'answer': '35.9617839364, -86.8168482184'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    random_index = random.randint(0, len(data)-1)\n",
    "    row = data.iloc[random_index]\n",
    "    name = row[\"name\"]\n",
    "    city = row[\"city\"]\n",
    "    state = row[\"state\"]\n",
    "    postal_code = row[\"postal_code\"]\n",
    "    question = \"What are the coordinates of {} in area with postal code {}, {}, {}?\".format(name, postal_code, city, state)\n",
    "    sub_table = data.loc[(data[\"name\"] == name) & (data[\"city\"] == city) & (data[\"state\"] == state) & (data[\"postal_code\"] == postal_code)]\n",
    "    latitude = str(sub_table.iloc[0][\"latitude\"])\n",
    "    longitude = str(sub_table.iloc[0][\"longitude\"])\n",
    "    answer = latitude + \", \" + longitude\n",
    "    questions.append({\"qid\": \"easy-yelp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "with jsonlines.open('/<YOUR_OWN_PATH>/ToolQA/data/questions/easy/yelp-easy.jsonl', mode='w') as writer:\n",
    "    for row in questions:\n",
    "        writer.write(row)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llm",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
